'''
Author: LHY 2601958675@qq.com
Date: 2023-06-30 11:32:29
LastEditors: LHY
LastEditTime: 2023-07-05 15:31:04
Description: 抓取天堂电影
'''
import requests
import re

f = open('dy2023list.csv',mode='w',encoding='utf-8')
f2 = open('dy2023list-down.csv',mode='w',encoding='utf-8')
def dymovies(p):
    print(p)
    url = "https://dy2018.com/"
    headers = {"Referer":"https://dy2018.com/","User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
   
    respon = requests.get(url,headers=headers)
    respon.encoding = "gbk"
    # print(respon.text)

    res = re.compile(r"2023必看热片.*?<ul>(?P<html>.*?)</ul>",re.S)
    resualt = res.search(respon.text)
    html = resualt.group("html")


    obj = re.compile(r'''<li><a href='(?P<url>.*?)' title="(?P<title>.*?)">.*?</li>''',re.S)
    resualt1 = obj.finditer(html)
    
    # for item in resualt1:#写入本地
    #     url = item.group("url").strip()
    #     print(url)
    #     title = item.group("title").strip()
    #     print(title)
    #     f.write(f'https://dy2018.com/{url},{title}\n')


    obj3 = re.compile(r'<div id="Zoom">.*?◎片　　名(?P<movies_name>.*?)<br />.*?<td style="WORD-WRAP: break-word"'
                      r' bgcolor="#fdfddf"><a href="(?P<downloadUrl>.*?)">',re.S)
    for item in resualt1:
        uri = url.strip("/") + item.group("url").strip()
        uriRes = requests.get(uri)
        uriRes.encoding = 'gbk'
        # print(uriRes.text)
        esult3 = obj3.search(uriRes.text)
        movie = esult3.group("movies_name").strip
        down = esult3.group("downloadUrl")
        print(down)
        print(movie)
        # print(esult3.group("movies_name"))
        # print(esult3.group("downloadUrl"))
        
        f2.write(f'{movie},{down}\n')
        # break
    
    pass
dymovies(0)